#include <float.h>

#include "../log.h"

#include "mips.h"
#include "vfpuutil.h"

#define V(i)  (cpu.v[i])
#define VI(i) (*(u32*)(&cpu.v[i]))

static s32 writeMask[4] = {0};

void SetWriteMask(s32 wm[4])
{
    s32 i;
    for (i = 0; i < 4; i++)
        writeMask[i] = wm[i];
}

void ReadMatrix(float *rd, DataSize size, s32 outMatrixWidth, s32 reg)
{
    s32 mtx = (reg >> 2) & 7;
    s32 idx = reg & 3;
    s32 fsl = 0;
    s32 k, l;
    s32 i, j;
    k = 0;

    s32 transpose = (reg >> 5) & 1;

    switch (size)
    {
    case V_Single:
        fsl = (reg >> 5) & 3;
        k = 1;
        l = 1;
        break;

    case V_Pair:
        fsl = (reg >> 5) & 2;
        k = 2;
        l = 1;
        break;

    case V_Triple:
        fsl = (reg >> 6) & 1;
        k = 3;
        l = 1;
        break;

    case V_Quad:
        fsl = (reg >> 5) & 2;
        k = 4;
        l = 1;
        break;

    case V_2x2:
        fsl = (reg >> 5) & 2;
        k = 2;
        l = 2;
        break;

    case V_3x3:
        fsl = (reg >> 6) & 1;
        k = 3;
        l = 3;
        break;

    case V_4x4:
        fsl = (reg >> 5) & 2;
        k = 4;
        l = 4;
        break;

    case V_Invalid:
        _log(ERR, VFPU, "Error: invalid size in ReadMatrix() !");
        break;
    }

    for (i = 0; i < k; i++)
    {
        for (j = 0; j < l; j++)
        {
            if (transpose)
                rd[j * outMatrixWidth + i] = V(mtx * 4 + ((idx + i) & 3) + ((fsl + j) & 3) * 32);
            else
                rd[j * outMatrixWidth + i] = V(mtx * 4 + ((idx + j) & 3) + ((fsl + i) & 3) * 32);
        }
    }
}

void WriteMatrix(const float *rd, DataSize size, s32 inMatrixWidth, s32 reg)
{
    s32 mtx = (reg >> 2) & 7;
    s32 col = reg & 3;
    s32 row = 0;
    s32 k, l;
    s32 i, j;
    k = 0;

    s32 transpose = (reg >> 5) & 1;

    switch (size)
    {
    case V_Single:
        row = (reg >> 5) & 3;
        k = 1;
        l = 1;
        break;

    case V_Pair:
        row = (reg >> 5) & 2;
        k = 2;
        l = 1;
        break;

    case V_Triple:
        row = (reg >> 6) & 1;
        k = 3;
        l = 1;
        break;

    case V_Quad:
        row = (reg >> 5) & 2;
        k = 4;
        l = 1;
        break;

    case V_2x2:
        row = (reg >> 5) & 2;
        k = 2;
        l = 2;
        break;

    case V_3x3:
        row = (reg >> 6) & 1;
        k = 3;
        l = 3;
        break;

    case V_4x4:
        row = (reg >> 5) & 2;
        k = 4;
        l = 4;
        break;

    case V_Invalid:
        _log(ERR, VFPU, "Error: invalid size in WriteMatrix() !");
        break;
    }

    for (i = 0; i < k; i++)
    {
        for (j = 0; j < l; j++)
        {
            if (!writeMask[i])
            {
                if (transpose)
                    V(mtx * 4 + ((col + i) & 3) + ((row + j) & 3) * 32) = rd[j * inMatrixWidth + i];
                else
                    V(mtx * 4 + ((col + j) & 3) + ((row + i) & 3) * 32) = rd[j * inMatrixWidth + i];
            }
        }
    }
}

s32 GetNumElements(DataSize sz)
{
    switch (sz)
    {
    case V_Single:
        return 1;

    case V_Pair:
        return 2;

    case V_Triple:
        return 3;

    case V_Quad:
        return 4;

    case V_2x2:
        return 4;

    case V_3x3:
        return 9;

    case V_4x4:
        return 16;

    case V_Invalid:
        _log(ERR, VFPU, "Error: invalid size in GetNumElements() !");
        break;
    }
    return 0;
}

DataSize GetHalfSize(DataSize sz)
{
    switch (sz)
    {
    case V_Pair:
        return V_Single;

    case V_Quad:
        return V_Pair;

    case V_2x2:
        return V_Single;

    case V_4x4:
        return V_2x2;

    default:
        return V_Invalid;
    }
}

DataSize GetVecSize(u32 op)
{
    u8 a = (op >> 7) & 1;
    u8 b = (op >> 15) & 1;

    a += (b << 1);
    switch (a)
    {
    case 0:
        return V_Single;

    case 1:
        return V_Pair;

    case 2:
        return V_Triple;

    case 3:
        return V_Quad;

    default:
        return V_Invalid;
    }
}

DataSize GetMtxSize(u32 op)
{
    s32 a = (op >> 7) & 1;
    s32 b = (op >> 15) & 1;

    a += (b << 1);
    switch (a)
    {
    case 1:
        return V_2x2;

    case 2:
        return V_3x3;

    case 3:
        return V_4x4;

    default:
        return V_Invalid;
    }
}

s32 GetMatrixSide(DataSize sz)
{
    switch (sz)
    {
    case V_2x2:
        return 2;

    case V_3x3:
        return 3;

    case V_4x4:
        return 4;

    default:
        return 0;
    }
}

float Float16ToFloat32(u16 l)
{
    union float2s32 {
        u32 i;
        float f;
    } float2int;

    u16 float16 = l;
    u32 sign = (float16 >> VFPU_SH_FLOAT16_SIGN) & VFPU_MASK_FLOAT16_SIGN;
    s32 exponent = (float16 >> VFPU_SH_FLOAT16_EXP) & VFPU_MASK_FLOAT16_EXP;
    u32 fraction = float16 & VFPU_MASK_FLOAT16_FRAC;

    float signf = (sign == 1) ? -1.0f : 1.0f;

    float f;
    if (exponent == VFPU_FLOAT16_EXP_MAX)
    {
        _log(WRN, VFPU, "Mmmh, there's maybe bugs here...");
        if (fraction == 0)
            f = FLT_MAX;
        else
            f = 0;
    }
    else if (exponent == 0 && fraction == 0)
        f = 0.0f * signf;
    else
    {
        if (exponent == 0)
        {
            do
            {
                fraction <<= 1;
                exponent--;
            } while (!(fraction & (VFPU_MASK_FLOAT16_FRAC + 1)));

            fraction &= VFPU_MASK_FLOAT16_FRAC;
        }

        /* Convert to 32-bit single-precision IEEE754. */
        float2int.i = sign << 31;
        float2int.i |= (exponent + 112) << 23;
        float2int.i |= fraction << 13;
        f = float2int.f;
    }
    return f;
}
